# -*- coding: utf-8 -*-
"""
@Time    : 2020/12/01
@Author  : xinghen
@File    : nh_dim_weight_cal.py
"""
import pandas as pd
from typing import Dict

from pandas import DataFrame
import numpy as np
from core.data_processing.indicator_dic import IndicatorDic
from core.data_processing.processing_year_data import NhDataProcessing
from core.moduls.nh_dim_correction import NhDimCorrection
from core.moduls.nh_dim_gra_cal import NhDimGraCal
from core.utils.nh_dea_utils import NhDeaUtils

from statsmodels.formula.api import ols  # 加载ols模型


class NhDimWeightCal:

    @staticmethod
    def multiple_correlation_cal(data_cal):
        """
        计算每一年每一个维度的复相关系数
        :param data_cal:
        :return:
        """
        dim_dic_keys = IndicatorDic.dim_dic.keys()  # all dim key
        year_dim_corr_w = {}

        for _ in data_cal.keys():
            con_data = data_cal[_]
            year_dim_corr_w[_] = {}
            # NhDimWeightCal.dim_multiple_correlation_cal()

            for dim_item in dim_dic_keys:  # 获取每个维度数据，计算权重
                dim_corr_w_h = NhDimWeightCal.dim_multiple_correlation_cal(con_data[dim_item])
                dim_corr_w_h = pd.DataFrame(dim_corr_w_h.values.T, columns=dim_corr_w_h.index, index=dim_corr_w_h.columns)  # FIXME 需要设置每列的名字还为之前的名字
                year_dim_corr_w[_][dim_item] = dim_corr_w_h

            # for dim_item in dim_dic_keys:  # 获取每个维度数据，计算权重
            #     dim_corr_w = con_data[dim_item].corr()  # type: pd.DataFrame
            #     dim_corr_w_h = dim_corr_w.apply(lambda x: x.mean(), axis=1)
            #     dim_corr_pc_w = (1 / dim_corr_w_h) / ((1 / dim_corr_w_h).sum())  # type: pd.DataFrame
            #     year_dim_corr_w[_][dim_item] = dim_corr_pc_w.fillna(0)
        return year_dim_corr_w

    @staticmethod
    def variable_coefficient_cal(data_cal):
        """
        计算每一年每一个维度的变异系数
        :param data_cal:
        :return:
        """
        year_dim_by_w = {}  # 每一年每一个维度的变异系数
        dim_dic_keys = IndicatorDic.dim_dic.keys()  # all dim key
        for _ in data_cal.keys():
            con_data = data_cal[_]
            for dim_item in dim_dic_keys:  # 获取每个维度数据，计算权重
                dim_by_w = (con_data[dim_item].std() / con_data[dim_item].mean())  # 变异系数权重
                dim_by_pc_w = dim_by_w / dim_by_w.sum()  # 变异系数百分比权重
                year_dim_by_w[_][dim_item] = dim_by_pc_w
        return year_dim_by_w

    @staticmethod
    def dim_db_correct_cal(all_year_data, all_year_dim_w, zrbf_data):
        correct_method = IndicatorDic.correct_method
        all_year_db_correct_result = pd.DataFrame()  # 按维度向前推两年双重修正的结果
        for year in all_year_data.keys():
            # 各维度的gra结果
            every_year_data = NhDataProcessing.every_year_data(all_year_data, "check_indicator")  # 每年度的数据
            every_year_rp_data = NhDataProcessing.every_year_data(all_year_data, "jc_indicator")  # 每年度的奖惩数据
            dim_gra_result = NhDimGraCal().dim_gra_cal(every_year_data[year], all_year_dim_w[year])  # year的各维度的gra结果
            rp_correct_result = NhDimCorrection().reward_punish_correct(dim_gra_result,
                                                                        every_year_rp_data[year])  # 奖惩修正结果
            dea_result = NhDeaUtils().dea_py(zrbf_data["data"][year], zrbf_data["pjxs"])  # 自然禀赋指标结果
            db_correct_result = NhDimCorrection().dea_correct(rp_correct_result, dea_result)  # 双重修正结果
            all_year_db_correct_result = pd.concat([all_year_db_correct_result, db_correct_result])  # 将修正结果连接

        dim_weight_result = pd.DataFrame()  # 按维度计算权重系数
        if correct_method == IndicatorDic.MULTIPLE_CORRELATION_W:  # 复相关算法求解权重系数
            dim_weight_result = NhDimWeightCal.dim_multiple_correlation_cal(all_year_db_correct_result)
        else:  # 变异算法计算权重系数
            dim_weight_result = NhDimWeightCal.dim_variable_coefficient_cal(all_year_db_correct_result)

        return dim_weight_result

    @staticmethod
    def dim_multiple_correlation_cal(con_data):
        c_key = con_data.columns
        mc_weight = []
        formula = ""
        for i in range(len(c_key)):
            formula = c_key[i] + "~ "
            for j in range(len(c_key)):
                if i != j:
                    formula = formula + c_key[j] + "+"
            formula = formula[: -1]
            try:
                lm = ols(formula, data=con_data).fit()
                mc_weight.append(np.square(lm.rsquared))
            except:
                mc_weight.append(0)
        mc_pd = pd.DataFrame(mc_weight)  # FIXME 需要设置每列的名字还为之前的名字
        mc_pd = (1 / mc_pd) / ((1 / mc_pd).sum())
        mc_pd = pd.DataFrame(mc_pd.values.T, columns=c_key, index=mc_pd.columns)  # FIXME 需要设置每列的名字还为之前的名字
        return mc_pd

        # dim_mc_weight_result = pd.DataFrame()  # 按维度计算复相关系数
        # dim_dic_keys = IndicatorDic.dim_dic.keys()  # all dim key
        # mc_result = []
        # for dim_item in dim_dic_keys:  # 获取每个维度数据，计算权重
        #     dim_corr_w = con_data[dim_item].corr()  # type: pd.DataFrame
        #     dim_corr_w_h = dim_corr_w.apply(lambda x: x.mean(), axis=1)
        #     dim_corr_pc_w = (1 / dim_corr_w_h) / ((1 / dim_corr_w_h).sum())  # type: pd.DataFrame
        #     mc_result.append(dim_corr_pc_w)

        # return dim_mc_weight_result

    @staticmethod
    def dim_variable_coefficient_cal(con_data):
        dim_mc_weight_result = pd.DataFrame()  # 按维度计算变异系数
        dim_dic_keys = IndicatorDic.dim_dic.keys()  # all dim key
        for dim_item in dim_dic_keys:  # 获取每个维度数据，计算权重
            dim_by_w = (con_data[dim_item].std() / con_data[dim_item].mean())  # 变异系数权重
            dim_by_pc_w = dim_by_w / dim_by_w.sum()  # 变异系数百分比权重
            dim_mc_weight_result = dim_by_pc_w
        return dim_mc_weight_result
